## author:    A. D�r, Robert A. Huber, Gemma Mateo, Gabriele Spilker
## contact:   robert.huber@sbg.ac.at
## file name: nti_sample.R
## Context:   Project on NTI in PTAs
## started:   2020-03-09
## Summary:   Provides information on sample quality

population <- read.csv("data/full_survey_data.csv")

sample <- subset(population, responded == 1)

#### Comparison on key dimension ####

####..Group Type####
prop.test(x = c(table(population$type)[1], table(sample$type)[1]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$type)[2], table(sample$type)[2]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$type)[3], table(sample$type)[3]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$type)[4], table(sample$type)[4]), n = c(nrow(population), nrow(sample)))

####..Continent####
population$continent <- countrycode::countrycode(sourcevar = population$headquarter, origin = "country.name", destination = "continent")

population$continent <- ifelse(population$headquarter == "Caribbean", "Americas",
                               ifelse(population$headquarter %in% c("Deutchland", "Kosovo", "EU"), "Europe",
                                      ifelse(!is.na(population$continent), population$continent, NA)))

sample$continent <- countrycode::countrycode(sourcevar = sample$headquarter, origin = "country.name", destination = "continent")
  
sample$continent <- ifelse(sample$headquarter == "Caribbean", "Americas",
                               ifelse(sample$headquarter %in% c("Deutchland", "Kosovo", "EU"), "Europe",
                                      ifelse(!is.na(sample$continent), sample$continent, NA)))

prop.test(x = c(table(population$continent)[1], table(sample$continent)[1]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$continent)[2], table(sample$continent)[2]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$continent)[3], table(sample$continent)[3]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$continent)[4], table(sample$continent)[4]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$continent)[5], table(sample$continent)[5]), n = c(nrow(population), nrow(sample)))

####..Source####

prop.test(x = c(table(population$source)[1], table(sample$source)[1]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$source)[2], table(sample$source)[2]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$source)[3], table(sample$source)[3]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$source)[4], table(sample$source)[4]), n = c(nrow(population), nrow(sample)))

prop.test(x = c(table(population$source)[5], table(sample$source)[5]), n = c(nrow(population), nrow(sample)))

#..Regression test####

m_type <- glm(responded ~ as.factor(type),
         data = population,
         family = "binomial")

summary(m_type)

m_conti <- glm(responded ~ continent,
         data = population,
         family = "binomial")

summary(m_conti)

m_source <- glm(responded ~ source,
               data = population,
               family = "binomial")

summary(m_source)

texreg::texreg(list(m_type, m_conti, m_source),
               file = "./output/TableA3.tex",
               single.row = T,
               custom.model.names = c("Type", "Continent", "Source"),
               custom.coef.names = c(NA, "Labour Union", "Professional Assoc.", "Business Group",
                                     "Americas", "Asia", "Europe", "Oceania",
                                     "Civil Society Dialog", "Transparency Register",
                                     "UN", "WTO"),
               float.pos = "htb",
               caption = "Response Behaviour by Group",
               caption.above = T,
               label = "tab:response",
               custom.note = "\\parbox{.7\\linewidth}{%stars. Entries are unstandardised coefficients from a logist regression. Standard errors in parentheses. The dependent variable is a dummy whether a group responded or not.}",
               leading.zero = T,
               digits = 2,
               dcolumn = T,
               use.packages = F, 
               stars = .05)

####..Graph####

shares <- c(prop.table(table(population$type)),
            prop.table(table(population$continent)),
            prop.table(table(population$source)),
            prop.table(table(sample$type)),
            prop.table(table(sample$continent)),
            prop.table(table(sample$source)))

sem <- function(x) sd(x)/sqrt(length(x))
sem <- c(rep(NA, 14),
         sem(sample$type == 1), sem(sample$type == 1),
         sem(sample$type == 3), sem(sample$type == 4),
         sem(sample$continent == "Africa"), sem(sample$continent == "Americas"),
         sem(sample$continent == "Asia"), sem(sample$continent == "Europe"),
         sem(sample$continent == "Oceania"),
         sem(sample$source == "as"), sem(sample$source == "cs"),
         sem(sample$source == "tr"), sem(sample$source == "un"),
         sem(sample$source == "wt"))

type <- rep(c("Citizen Group", "Labour Union", "Professional Assoc.", "Business Group",
              "Africa", "Americas", "Asia", "Europe", "Oceania",
              "ASEAN", "Civil-Society Dialog", "Transparency Register", "UN", "WTO"), 2)

type <- gsub(pattern = " ", replacement = "\n", type)

group <- rep(c("Population", "Sample"), each = 14)

category <- rep(c(rep("1) Group Type", 4), rep("2) Continent", 5), rep("3) Source", 5)), 2)

df_plot <- data.frame(shares, sem, type, group, category)

ggplot(df_plot, aes(x =  type, y = shares, group = group, fill = group)) + 
  geom_bar(stat = "identity", position = position_dodge(width = 1), alpha = .5) + 
  geom_errorbar(aes(ymin = shares - 1.96*sem, ymax = shares + 1.96*sem, width = .5), position = position_dodge(width = 1)) +
  facet_wrap(category~., scales = "free_x") + 
  theme_minimal() + 
  labs(x = "Group", y = "Share") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
  theme(legend.position = "none") + 
  #ggthemes::scale_fill_colorblind("") +
  #theme(
  #      panel.border = element_rect(colour = "black", fill=NA, size=1)) +
  NULL

ggsave("./output/FigureA1.pdf",
       width = 21,
       height = 12,
       units = "cm")

